rm(list=ls())
library("foreign")
library("readstata13")
library("xlsx")
library("reshape2")
library("lubridate")

case_province <- read.dta13("/Users/Mandywu/Dropbox (MIT)/PNAS revision/code/case_summary_province.dta")
case_province <- case_province[-32,]

inspection <- as.data.frame(matrix(nrow=31,ncol=5))
names(inspection) <- c("province_name","batch","start_date","end_date","省份_Eng")
inspection$province_name <- c("上海市","云南省","内蒙古自治区","北京市","吉林省","四川省","天津市",          
                              "宁夏回族自治区","安徽省","山东省","山西省","广东省","广西壮族自治区","新疆维吾尔自治区",
                              "江苏省","江西省","河北省","河南省","浙江省","海南省","湖北省",
                              "湖南省","甘肃省","福建省","西藏自治区","贵州省","辽宁省",
                              "重庆市","陕西省","青海省","黑龙江省")
inspection$省份_Eng  <- c("Shanghai","Yunnan","Nei Mongol","Beijing","Jilin","Sichuan","Tianjin","Ningxia Hui","Anhui",
                        "Shandong","Shanxi","Guangdong","Guangxi","Xinjiang Uygur","Jiangsu","Jiangxi","Hebei","Henan",
                        "Zhejiang","Hainan","Hubei","Hunan","Gansu","Fujian","Xizang","Guizhou","Liaoning","Chongqing",
                        "Shaanxi","Qinghai","Heilongjiang")


inspection[inspection$province_name=="河北省",]$batch <- 0
inspection[inspection$province_name=="河北省",]$start_date <- c("2015-12-31")
inspection[inspection$province_name=="河北省",]$end_date <- c("2016-02-14")

inspection[inspection$province_name=="内蒙古自治区"|inspection$province_name=="宁夏回族自治区"|inspection$province_name=="广西壮族自治区"|
             inspection$province_name=="黑龙江省"|inspection$province_name=="江苏省"|inspection$province_name=="江西省"|
             inspection$province_name=="河南省"|inspection$province_name=="云南省",]$batch <- 1
inspection[inspection$province_name=="内蒙古自治区"|inspection$province_name=="宁夏回族自治区"|inspection$province_name=="广西壮族自治区"|
             inspection$province_name=="黑龙江省"|inspection$province_name=="江苏省"|inspection$province_name=="江西省"|
             inspection$province_name=="河南省"|inspection$province_name=="云南省",]$start_date <- c("2016-07-12")
inspection[inspection$province_name=="内蒙古自治区"|inspection$province_name=="宁夏回族自治区"|inspection$province_name=="广西壮族自治区"|
             inspection$province_name=="黑龙江省"|inspection$province_name=="江苏省"|inspection$province_name=="江西省"|
             inspection$province_name=="河南省"|inspection$province_name=="云南省",]$end_date <- c("2016-08-19")

inspection[inspection$province_name=="北京市"|inspection$province_name=="上海市"|inspection$province_name=="重庆市"|
             inspection$province_name=="湖北省"|inspection$province_name=="广东省"|inspection$province_name=="陕西省"|
             inspection$province_name=="甘肃省",]$batch <- 2
inspection[inspection$province_name=="北京市"|inspection$province_name=="上海市"|inspection$province_name=="重庆市"|
             inspection$province_name=="湖北省"|inspection$province_name=="广东省"|inspection$province_name=="陕西省"|
             inspection$province_name=="甘肃省",]$start_date <- c("2016-11-24")
inspection[inspection$province_name=="北京市"|inspection$province_name=="上海市"|inspection$province_name=="重庆市"|
             inspection$province_name=="湖北省"|inspection$province_name=="广东省"|inspection$province_name=="陕西省"|
             inspection$province_name=="甘肃省",]$end_date <- c("2016-12-30")

inspection[inspection$province_name=="天津市"|inspection$province_name=="山西省"|inspection$province_name=="辽宁省"|
             inspection$province_name=="安徽省"|inspection$province_name=="福建省"|inspection$province_name=="湖南省"|
             inspection$province_name=="贵州省",]$batch <- 3
inspection[inspection$province_name=="天津市"|inspection$province_name=="山西省"|inspection$province_name=="辽宁省"|
             inspection$province_name=="安徽省"|inspection$province_name=="福建省"|inspection$province_name=="湖南省"|
             inspection$province_name=="贵州省",]$start_date <- c("2017-04-24")
inspection[inspection$province_name=="天津市"|inspection$province_name=="山西省"|inspection$province_name=="辽宁省"|
             inspection$province_name=="安徽省"|inspection$province_name=="福建省"|inspection$province_name=="湖南省"|
             inspection$province_name=="贵州省",]$end_date <- c("2017-05-28")

inspection[is.na(inspection$batch),]$batch <-4
inspection[is.na(inspection$start_date),]$start_date <- c("2017-08-07")
inspection[is.na(inspection$end_date),]$end_date <- c("2017-09-15")

case_province <- merge(case_province,inspection,all.x = TRUE,all.y = TRUE,by=c("省份_Eng"))

#### read in province level summary statistics
GDP_province <- read.xlsx(file=paste("/Users/Mandywu/Dropbox (MIT)/PNAS revision/code/GDP.xlsx"),2)
names(GDP_province)[6] <- c("Nei Mongol")
names(GDP_province)[31:32]<-c("Ningxia Hui","Xinjiang Uygur")
GDP_province$year <- year(GDP_province$NA.)
GDP_province <- GDP_province[,-1]
GDP_province <- melt(GDP_province,id="year")
names(GDP_province)<-c("year","province","GDP")

Pop_province <- read.xlsx(file=paste("/Users/Mandywu/Dropbox (MIT)/PNAS revision/code/Population.xlsx"),2)
names(Pop_province)[17] <- c("Nei Mongol")
names(Pop_province)[31:32]<-c("Ningxia Hui","Xinjiang Uygur")
Pop_province$year <- year(Pop_province$NA.)
Pop_province <- Pop_province[,-1]
Pop_province <- melt(Pop_province,id="year")
names(Pop_province)<-c("year","province","Pop")

Stat_province <- merge(GDP_province,Pop_province,all.x = TRUE,all.y = TRUE,by=c("year","province"))
Stat_province <- Stat_province[Stat_province$year==2016,]
Stat_province$per_capital <- Stat_province$GDP*1000000000/(Stat_province$Pop*1000000)
Stat_province <- Stat_province[,-1]
names(Stat_province)[1] <- c("省份_Eng")
summary_province <- merge(case_province,Stat_province,all.x = TRUE,all.y = TRUE,by=c("省份_Eng"))
summary_province <- summary_province[,-2]
summary_province <- summary_province[,-7]
summary_province <- summary_province[,-10]

names(summary_province)[1:9] <- c("province_name_eng","case_closed","ordering_rectification","shut_down",
                                   "filing_case","fine(million CNY)","detension","interview","accountability")
summary_province <- summary_province[,c("province_name","province_name_eng","batch","start_date","end_date","GDP","Pop",
                                        "per_capital","case_closed","ordering_rectification","shut_down",
                                        "filing_case","fine(million CNY)","detension","interview","accountability")]
batch_stat <- summary_province[,-(1:2)]
batch_stat <- batch_stat[,-(2:3)]
batch_output <- aggregate(batch_stat[,2:12], by=list(batch_stat$batch), mean)

library(data.table)
#batch_output <- t(batch_output)
t_batch_output <- transpose(batch_output)

# get row and colnames in order
colnames(t_batch_output) <- rownames(batch_output)
rownames(t_batch_output) <- colnames(batch_output)
t_batch_output <- t_batch_output[-1,]
t_batch_output[,1:5] <-round(t_batch_output[,1:5],0)
numer_of_provinces <-  c(nrow(inspection[inspection$batch==0,]),nrow(inspection[inspection$batch==1,]),nrow(inspection[inspection$batch==2,]),
                         nrow(inspection[inspection$batch==3,]),nrow(inspection[inspection$batch==4,]))
start_date <- c("2015/12/31","2016/07/12","2016/11/24","2017/04/24","2017/08/07")
end_date <- c("2016/02/14","2016/08/19","2016/12/30","2017/05/28","2017/09/15")
t_batch_output <- rbind(end_date,t_batch_output)
t_batch_output <- rbind(start_date,t_batch_output)
t_batch_output <- rbind(numer_of_provinces,t_batch_output)

print(xtable(t_batch_output,digits=0), include.rownames=TRUE)



SO2_concentration <- read.dta("/Users/Mandywu/Dropbox (MIT)/PNAS revision/code/panel_prefec_weekly.dta")
SO2_before <- SO2_concentration[SO2_concentration$inspection_post==0,]
SO2_ave <- aggregate(concentration ~ province_name, data = SO2_before, FUN= "mean", na.rm=TRUE)
SO2_ave <- merge(SO2_ave,inspection,all.x = TRUE,all.y = TRUE,by=c("province_name"))
SO2_ave_batch <- aggregate(concentration ~ batch, data = SO2_ave, FUN= "mean", na.rm=TRUE)
SO2_ave_batch <- t(SO2_ave_batch)
print(xtable(SO2_ave_batch,digits=0), include.rownames=TRUE)



### code for table 2
data_all <- read.dta13("/Users/Mandywu/Dropbox (MIT)/PNAS revision/code/campaign_sum_season.dta")

sum1 <- aggregate(concentration ~ period + campaign_treated, data_all, function(x) c(mean = mean(x)))
sum2 <- aggregate(concentration ~ period + campaign_treated, data_all, function(x) c(sd = sd(x)))
sum <- merge(sum1,sum2,all.x = TRUE,all.y = TRUE,by=c("period","campaign_treated"))

library(xtable)
xtable(sum)

t.test(sum_city[sum_city$period==3&sum_city$campaign_treated==1,]$concentration-
         sum_city[sum_city$period==1&sum_city$campaign_treated==1,]$concentration, 
       sum_city[sum_city$period==3&sum_city$campaign_treated==0,]$concentration-
         sum_city[sum_city$period==1&sum_city$campaign_treated==0,]$concentration, var.equal=TRUE)
